In [1]:
from __future__ import print_function
from tqdm import tqdm
import mxnet as mx
from mxnet import gluon
In [2]:
# Set context
data_ctx = mx.cpu()
model_ctx = mx.cpu()
In [3]:
num_inputs = 2
num_outputs = 1
num_examples = 10000
In [4]:
W1_real = 2.0
W2_real = -3.4
b_real = 4.2
In [5]:
def real_fn(X):
return W1_real * X[:, 0] + W2_real * X[:, 1] + b_real
In [6]:
X = mx.nd.random_normal(shape=(num_examples, num_inputs))
noise = 0.01 * mx.nd.random_normal(shape=(num_examples,))
y = real_fn(X) + noise
In [7]:
# Data iterator
batch_size = 4
train_data = gluon.data.DataLoader(gluon.data.ArrayDataset(X, y),
batch_size=batch_size,
shuffle=True)
In [8]:
# Define the model
net = gluon.nn.Dense(in_units=2, units=1)
In [9]:
print(net.weight)
print(net.bias)
In [10]:
# By default, weights and biases are of type Parameter.
print(type(net.weight))
print(type(net.bias))
In [11]:
# After collecting the parameters, we obtain ParameterDict(ionary)
net.collect_params()
Out[11]:
In [12]:
type(net.collect_params())
Out[12]:
In [13]:
# Initialize with standard normal distribution
net.collect_params().initialize(mx.init.Normal(sigma=1.0), ctx=model_ctx)
In [14]:
print(net.weight.data())
print(net.bias.data())
In [15]:
example_data = mx.nd.array([[4,7]])
In [16]:
# "Prediction"
net(example_data)
Out[16]:
In [17]:
# It is not crucial to define in_units
# The parameters will be initialized when the data flows through it for the first time.
net = gluon.nn.Dense(units=1)
net.collect_params().initialize(mx.init.Normal(sigma=1.), ctx=model_ctx)
In [18]:
# Defining the loss as squared error
square_loss = gluon.loss.L2Loss()
In [19]:
trainer = gluon.Trainer(params=net.collect_params(),
optimizer='sgd',
optimizer_params={'learning_rate': 0.0001})
In [20]:
epochs = 20
loss_sequence = []
num_batches = num_examples / batch_size
for e in range(epochs):
cumulative_loss = 0
# Iterating over the batches
for i, (data, label) in tqdm(enumerate(train_data)):
data = data.as_in_context(model_ctx)
label = label.as_in_context(model_ctx)
with mx.autograd.record():
output = net(data)
loss = square_loss(output, label)
loss.backward()
trainer.step(batch_size)
cumulative_loss += mx.nd.mean(loss).asscalar()
print("Epoch %s, loss: %s" % (e, cumulative_loss / num_examples))
loss_sequence.append(cumulative_loss)
In [21]:
# ParameterDict
params = net.collect_params()
In [22]:
for param in params.values():
print(param.name, param.data())
In [23]:
list(params.values())[0].data()
Out[23]:
In [24]:
[W1, W2] = list(params.values())[0].data()[0]
In [25]:
b = list(params.values())[1].data()[0]
In [26]:
# Learned values
print(W1_real)
print(W2_real)
print(b_real)
In [27]:
# True values
print(W1.asscalar())
print(W2.asscalar())
print(b.asscalar())